/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */ /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */ package net.nutch.fetcher; import net.nutch.util.LogFormatter; import net.nutch.util.NutchConf; import net.nutch.util.StringUtil; import java.net.URL; import java.util.Date; import java.util.HashSet; import java.util.logging.Logger; import java.util.logging.Level; import java.util.logging.Handler; /** * Utility class for reporting events to the Fetcher log, and for * generating summary reports based on event counts. * */ public final class FetcherStatus implements FetcherConstants { public static final Logger LOG= LogFormatter.getLogger("net.nutch.fetcher.FetcherStatus"); private static final boolean LOG_SUCCESS= NutchConf.getBoolean("fetcher.trace.success", false); private static final boolean LOG_NOT_FOUND= NutchConf.getBoolean("fetcher.trace.not.found", false); private static final boolean USE_LONG_TRACE_MSGS= NutchConf.getBoolean("fetcher.trace.longmsg", false); private static final String NEWLINE_STRING= System.getProperty("line.separator"); private static final String URL_UNKNOWN= "url_unknown"; private static final long SECONDS_TO_MS_MULTIPLIER= 1000; /** * The <code>String</code> length of the longest MISC_* constant's * prettyName. */ public static final int MISC_PRETTY_NAME_MAX_LEN; /** * The <code>String</code> length of the longest FAIL_* constant's * prettyName. */ public static final int FAILURE_PRETTY_NAME_MAX_LEN; /** * The <code>String</code> length of the longest ERR_* constant's * prettyName. */ public static final int ERROR_PRETTY_NAME_MAX_LEN; /** * The <code>String</code> length of the longest OUT_* constant's * prettyName. */ public static final int OUT_PRETTY_NAME_MAX_LEN; static { int max= 0; for (int i= 0; i < NUM_MISC_CODES; i++) if (getMiscInfoPrettyName(i).length() > max) max= getMiscInfoPrettyName(i).length(); MISC_PRETTY_NAME_MAX_LEN= max; } static { int max= 0; for (int i= 0; i < NUM_FAIL_REASONS; i++) if (getFailurePrettyName(i).length() > max) max= getFailurePrettyName(i).length(); FAILURE_PRETTY_NAME_MAX_LEN= max; } static { int max= 0; for (int i= 0; i < NUM_ERR_REASONS; i++) if (getErrorPrettyName(i).length() > max) max= getErrorPrettyName(i).length(); ERROR_PRETTY_NAME_MAX_LEN= max; } static { int max= 0; for (int i= 0; i < NUM_OUT_STATUS; i++) if (getOutputStatusPrettyName(i).length() > max) max= getOutputStatusPrettyName(i).length(); OUT_PRETTY_NAME_MAX_LEN= max; } private long requestsIssued; private long fetchListRequestsIssued; private long robotsRequestsIssued; private long http11RequestsIssued; private long requestRetries; private long fetchListRequestRetries; private long robotsRequestRetries; private long requestRedirects; private long fetchListRequestRedirects; private long robotsRequestRedirects; private long requestsSucceeded; private long fetchListRequestsSucceeded; private long robotsRequestsSucceeded; private long[] requestsFailedByReason= new long[NUM_FAIL_REASONS]; private long[] fetchListRequestsFailedByReason= new long[NUM_FAIL_REASONS]; private long[] robotsRequestsFailedByReason= new long[NUM_FAIL_REASONS]; private long[] requestErrorsByReason= new long[NUM_ERR_REASONS]; private long[] fetchListRequestErrorsByReason= new long[NUM_ERR_REASONS]; private long[] robotsRequestErrorsByReason= new long[NUM_ERR_REASONS]; private long[] outputStatusCounts= new long[NUM_OUT_STATUS]; private long getRequestAttempts; private long getRequestAllBusy; private long getRequestThrottled; private long getRequestFoundExcluded; private long getRequestFoundNotReady; private long getRequestSuccesses; private long outputQueueAdds; private long outputQueueAdded; private long outputQueueAddDelays; private long outputQueuePopAttempts; private long outputQueuePopped; private long outputQueuePopNoDelay; private long outputQueueEmpty; private long bytesFetched; private long bytesTransferred; private long fetchListBytesFetched; private long robotsBytesFetched; private long requestsReadFromFetchList; private long droppedOnFloor; private long rawBytesSent; private long rawBytesRecieved; private long numCompressedTransfers; private long numContinues; private long startTime; private long endTime; public FetcherStatus() { reset(); } public void reset() { requestsIssued= 0; fetchListRequestsIssued= 0; robotsRequestsIssued= 0; http11RequestsIssued= 0; requestRetries= 0; fetchListRequestRetries= 0; robotsRequestRetries= 0; requestRedirects= 0; fetchListRequestRedirects= 0; robotsRequestRedirects= 0; requestsSucceeded= 0; fetchListRequestsSucceeded= 0; robotsRequestsSucceeded= 0; for (int i= 0; i < NUM_FAIL_REASONS; i++) { requestsFailedByReason[i]= 0; fetchListRequestsFailedByReason[i]= 0; robotsRequestsFailedByReason[i]= 0; } for (int i= 0; i < NUM_ERR_REASONS; i++) { requestErrorsByReason[i]= 0; fetchListRequestErrorsByReason[i]= 0; robotsRequestErrorsByReason[i]= 0; } for (int i= 0; i < NUM_OUT_STATUS; i++) outputStatusCounts[i]= 0; getRequestAttempts= 0; getRequestAllBusy= 0; getRequestThrottled= 0; getRequestFoundExcluded= 0; getRequestFoundNotReady= 0; getRequestSuccesses= 0; outputQueueAdds= 0; outputQueueAdded= 0; outputQueueAddDelays= 0; outputQueuePopAttempts= 0; outputQueuePopped= 0; outputQueuePopNoDelay= 0; outputQueueEmpty= 0; bytesFetched= 0; bytesTransferred= 0; fetchListBytesFetched= 0; robotsBytesFetched= 0; requestsReadFromFetchList= 0; droppedOnFloor= 0; rawBytesSent= 0; rawBytesRecieved= 0; numCompressedTransfers= 0; numContinues= 0; startTime= new Date().getTime(); endTime= -1; } void dispatchingToFetcherThread(RequestRecord request) { requestsIssued++; if (request.isRobotsRequest()) robotsRequestsIssued++; else fetchListRequestsIssued++; } void readFromFetchlist() { requestsReadFromFetchList++; } void requestFailed(RequestRecord request) { logTraceReqFailure(request); requestsFailedByReason[request.getFailureReason()]++; if (request.isRobotsRequest()) robotsRequestsFailedByReason[request.getFailureReason()]++; else fetchListRequestsFailedByReason[request.getFailureReason()]++; } void requestError(RequestRecord request) { logTraceReqError(request); requestErrorsByReason[request.getErrorReason()]++; if (request.isRobotsRequest()) robotsRequestErrorsByReason[request.getErrorReason()]++; else fetchListRequestErrorsByReason[request.getErrorReason()]++; } void succeeded(RequestRecord request) { if (LOG_SUCCESS) logTraceMisc(MISC_FETCH_SUCCESS, request.getOriginalRequest().getURLString()); long bytes= request.getResponse().getContent().length; long tBytes= (request.getResponse().getCompressedContent() != null) ? request.getResponse().getCompressedContent().length : request.getResponse().getContent().length; requestsSucceeded++; bytesFetched+= bytes; bytesTransferred+= tBytes; if (request.isRobotsRequest()) { robotsRequestsSucceeded++; robotsBytesFetched+= bytes; } else { fetchListRequestsSucceeded++; fetchListBytesFetched+= bytes; } } void retry(RequestRecord request) { requestRetries++; if (request.isRobotsRequest()) robotsRequestRetries++; else fetchListRequestRetries++; } void redirected(RequestRecord request) { requestRedirects++; if (request.isRobotsRequest()) robotsRequestRedirects++; else fetchListRequestRedirects++; } void droppedOnFloor(RequestRecord request) { droppedOnFloor++; } void incrementOutputQueueAdd(int numAdded) { outputQueueAdds++; outputQueueAdded+= numAdded; } void incrementOutputQueueFull() { outputQueueAddDelays++; } void incrementOutputQueuePopNoDelay() { outputQueuePopAttempts++; outputQueuePopNoDelay++; } void incrementOutputQueuePopped() { outputQueuePopped++; } void incrementOutputQueueEmpty() { outputQueuePopAttempts++; outputQueueEmpty++; } void outputStatus(RequestRecord request, String urlString) { logTraceOutputStatus(request, urlString); outputStatusCounts[request.getOutputStatus()]++; } void incrementGetRequestAttempts() { getRequestAttempts++; } void incrementGetRequestAllBusy() { getRequestAllBusy++; } void incrementGetRequestThrottled() { getRequestThrottled++; } void incrementGetRequestSuccesses() { getRequestSuccesses++; } void incrementGetRequestFoundExcluded() { getRequestFoundExcluded++; } void incrementGetRequestFoundNotReady() { getRequestFoundNotReady++; } void incrementRawBytes(long sent, long received) { rawBytesSent+= sent; rawBytesRecieved+= received; } void incrementContinues(int continues) { numContinues+= continues; } public void logStats() { int code= MISC_STATS; long end= endTime; if (end == -1) end= new Date().getTime(); long totSecs= (end - startTime) / SECONDS_TO_MS_MULTIPLIER; long totMinutes= totSecs / 60; long hours= totSecs / 3600; long partialMinutes= (totSecs % 3600) / 60; long partialSecs= (totSecs % 3600) % 60; long tmp; if (totSecs == 0) totSecs= 1; if (totMinutes == 0) totMinutes= 1; StringBuffer buf= new StringBuffer(); if (endTime == -1) buf.append("RequestScheduler running for "); else buf.append("Diff stats over "); if (hours > 0) buf.append(hours).append(":"); if (partialMinutes < 10) buf.append("0"); buf.append(partialMinutes).append(":"); if (partialSecs < 10) buf.append("0"); buf.append(partialSecs); buf.append(" ("); buf.append(totSecs).append(" seconds)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); if (endTime != -1) { buf.append(" from ").append(new Date(startTime).toString()); buf.append(" to ").append(new Date(endTime).toString()); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); } // Requests buf.append(" Requests (rate): ").append(requestsIssued); buf.append("\t(").append(requestsIssued/totSecs).append(" req/sec)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append(" fetchList: ").append(fetchListRequestsIssued); buf.append("\t(").append(fetchListRequestsIssued/totSecs); buf.append(" req/sec)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append(" robots.txt: ").append(robotsRequestsIssued); buf.append("\t(").append(robotsRequestsIssued/totSecs); buf.append(" req/sec)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); // Retries tmp= requestsIssued == 0 ? 0 : (requestRetries * 100 / requestsIssued); buf.append(" Retries (rate): ").append(requestRetries); buf.append("\t(").append(tmp); buf.append("% retry/tot)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= fetchListRequestsIssued == 0 ? 0 : (fetchListRequestRetries * 100 / fetchListRequestsIssued); buf.append(" fetchList: ").append(fetchListRequestRetries); buf.append("\t("); buf.append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= robotsRequestsIssued == 0 ? 0 : (robotsRequestRetries * 100 / robotsRequestsIssued); buf.append(" robots.txt: ").append(robotsRequestRetries); buf.append("\t(").append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); // Redirects tmp= requestsIssued == 0 ? 0 : (requestRedirects * 100 / requestsIssued); buf.append("Redirects (rate): ").append(requestRedirects); buf.append("\t(").append(tmp); buf.append("% redirect/tot)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= fetchListRequestsIssued == 0 ? 0 : (fetchListRequestRedirects * 100 / fetchListRequestsIssued); buf.append(" fetchList: ").append(fetchListRequestRedirects); buf.append("\t("); buf.append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= robotsRequestsIssued == 0 ? 0 : (robotsRequestRedirects * 100 / robotsRequestsIssued); buf.append(" robots.txt: ").append(robotsRequestRedirects); buf.append("\t(").append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); // Succeeded tmp= requestsIssued == 0 ? 0 : (requestsSucceeded * 100 / requestsIssued); buf.append("Succeeded (rate): ").append(requestsSucceeded); buf.append("\t(").append(tmp); buf.append("% succ/req)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= fetchListRequestsIssued == 0 ? 0 : (fetchListRequestsSucceeded * 100 / fetchListRequestsIssued); buf.append(" fetchList: ").append(fetchListRequestsSucceeded); buf.append("\t("); buf.append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= robotsRequestsIssued == 0 ? 0 : (robotsRequestsSucceeded * 100 / robotsRequestsIssued); buf.append(" robots.txt: ").append(robotsRequestsSucceeded); buf.append("\t(").append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); // Failures long allFail= 0; long fetchListAllFail= 0; long robotsAllFail= 0; for (int i= 0; i < NUM_FAIL_REASONS; i++) { allFail+= requestsFailedByReason[i]; fetchListAllFail+= fetchListRequestsFailedByReason[i]; robotsAllFail+= robotsRequestsFailedByReason[i]; } // Failures buf.append("Failures (not retryable):"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append(" \t"); buf.append("All \tfetchList\trobots"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); for (int errCode= 0; errCode < NUM_FAIL_REASONS; errCode++) { String errDesc= StringUtil.rightPad(getFailurePrettyName(errCode), FAILURE_PRETTY_NAME_MAX_LEN); buf.append(errDesc).append("\t"); buf.append(requestsFailedByReason[errCode]).append("\t"); buf.append(fetchListRequestsFailedByReason[errCode]).append("\t"); buf.append(robotsRequestsFailedByReason[errCode]); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); } buf.append( StringUtil.rightPad("Total", FAILURE_PRETTY_NAME_MAX_LEN)); buf.append("\t"); buf.append(allFail).append("\t"); buf.append(fetchListAllFail).append("\t"); buf.append(robotsAllFail); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); // Errors long allErr= 0; long fetchListAllErr= 0; long robotsAllErr= 0; for (int i= 0; i < NUM_ERR_REASONS; i++) { allErr+= requestErrorsByReason[i]; fetchListAllErr+= fetchListRequestErrorsByReason[i]; robotsAllErr+= robotsRequestErrorsByReason[i]; } // Errors buf.append("Errors (retryable):"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append(" \t"); buf.append("All \tfetchList\trobots"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); for (int errCode= 0; errCode < NUM_ERR_REASONS; errCode++) { String errDesc= StringUtil.rightPad(getErrorPrettyName(errCode), ERROR_PRETTY_NAME_MAX_LEN); buf.append(errDesc).append("\t"); buf.append(requestErrorsByReason[errCode]).append("\t"); buf.append(fetchListRequestErrorsByReason[errCode]).append("\t"); buf.append(robotsRequestErrorsByReason[errCode]); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); } buf.append(StringUtil.rightPad("Total", ERROR_PRETTY_NAME_MAX_LEN)); buf.append("\t"); buf.append(allErr).append("\t"); buf.append(fetchListAllErr).append("\t"); buf.append(robotsAllErr); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); // Output long allOut= 0; for (int i= 0; i < NUM_OUT_STATUS; i++) { allOut+= outputStatusCounts[i]; } // Output buf.append("Output stats:"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); for (int statusCode= 0; statusCode < NUM_OUT_STATUS; statusCode++) { String statDesc= StringUtil.rightPad( getOutputStatusPrettyName(statusCode), OUT_PRETTY_NAME_MAX_LEN); buf.append(statDesc).append("\t"); buf.append(outputStatusCounts[statusCode]); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); } buf.append( StringUtil.rightPad("Total", OUT_PRETTY_NAME_MAX_LEN)); buf.append("\t"); buf.append(allOut); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); // getRequests buf.append("Fetcher polling (all but Succeed cause delays):"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append(" Polls: "); buf.append(getRequestAttempts); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= getRequestAttempts == 0 ? 0 : (getRequestSuccesses * 100 / getRequestAttempts); buf.append(" Succeeded: "); buf.append(getRequestSuccesses); buf.append("\t("); buf.append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= getRequestAllBusy == 0 ? 0 : (getRequestAllBusy * 100 / getRequestAttempts); buf.append(" Host Qs Busy: "); buf.append(getRequestAllBusy); buf.append("\t("); buf.append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); // outputQueue tmp= outputQueueAdds == 0 ? 0 : (outputQueueAddDelays / outputQueueAdds); buf.append("Fetcher delays due to Output Q Full: "); buf.append(outputQueueAddDelays); buf.append("\t("); buf.append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= outputQueueAdds == 0 ? 0 : (outputQueueAdded * 100 / outputQueueAdds); buf.append("Requests added to Output Q (per add): "); buf.append(outputQueueAdded); buf.append("\t("); buf.append( (float) tmp / 100.0f ); buf.append(")"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append("Output polling:"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append(" Polls: "); buf.append(outputQueuePopAttempts); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append(" Pops: "); buf.append(outputQueuePopped); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= outputQueuePopAttempts == 0 ? 0 : (outputQueuePopNoDelay * 100 / outputQueuePopAttempts); buf.append(" Pops w/o delay: "); buf.append(outputQueuePopNoDelay); buf.append("\t("); buf.append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= outputQueuePopAttempts == 0 ? 0 : (outputQueueEmpty * 100 / outputQueuePopAttempts); buf.append(" Output Q empty: "); buf.append(outputQueueEmpty); buf.append("\t("); buf.append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); // content bandwidth buf.append("actual content bytes fetched: "); buf.append(bytesTransferred); buf.append("\t("); buf.append(bytesTransferred * 8 / 1024 / totSecs); buf.append(" kbits/s avg)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append("effective content bytes: "); buf.append(bytesFetched); buf.append("\t("); buf.append(bytesFetched * 8 / 1024 / totSecs); buf.append(" kbits/s)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= bytesFetched == 0 ? 0 : ((bytesFetched - bytesTransferred) * 1000) / bytesFetched; buf.append("content bandwidth savings (compression): "); buf.append(bytesFetched - bytesTransferred); buf.append("\t("); buf.append( ( (float) tmp ) / 10.0f); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= bytesFetched == 0 ? 0 : (fetchListBytesFetched * 100 / bytesFetched); buf.append("effective fetchlist bytes fetched: "); buf.append(fetchListBytesFetched); buf.append("\t("); buf.append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); tmp= bytesFetched == 0 ? 0 : (robotsBytesFetched * 100 / bytesFetched); buf.append("effective robots bytes fetched: "); buf.append(robotsBytesFetched); buf.append("\t("); buf.append(tmp); buf.append("%)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append("raw bytes read: "); buf.append(rawBytesRecieved); buf.append("\t("); buf.append(rawBytesRecieved * 8 / 1024 / totSecs); buf.append(" kbits/s)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append("raw bytes sent: "); buf.append(rawBytesSent); buf.append("\t("); buf.append(rawBytesSent * 8 / 1024 / totSecs); buf.append(" kbits/s)"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append(requestsReadFromFetchList); buf.append(" requests have been read from the FetchList"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append(outputQueuePopped); buf.append(" requests have been dispatched for output"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); buf.append(droppedOnFloor); buf.append(" requests have been dropped on the floor"); FetcherStatus.logTraceMisc(code, buf.toString()); buf.setLength(0); } /** * Returns an approximation of the raw bandwidth used, in kbits/s, * for the period of time this FetcherStatus was measured over. * * <p> * * This is meant to include all bandwidth the fetcher used except * for DNS and TCP/IP overhead. */ public int getRawBandwidth() { long end= endTime; if (end == -1) end= new Date().getTime(); long totSecs= (end - startTime) / SECONDS_TO_MS_MULTIPLIER; if (totSecs == 0) totSecs= 1; return (int) (rawBytesRecieved * 8 / 1024 / totSecs); } /** * Returns a copy of this FetcherStatus object that reflects a * "snapshot"; it is identical with the possible exception of the * "end time", which is set to the current time if <code>this</code> * does not have an end time. * * <p> * * It is assumed that the returned object will never be updated. */ public FetcherStatus cloneStatus() { FetcherStatus other= new FetcherStatus(); other.requestsIssued= this.requestsIssued; other.fetchListRequestsIssued= this.fetchListRequestsIssued; other.robotsRequestsIssued= this.robotsRequestsIssued; other.http11RequestsIssued= this.http11RequestsIssued; other.requestRetries= this.requestRetries; other.fetchListRequestRetries= this.fetchListRequestRetries; other.robotsRequestRetries= this.robotsRequestRetries; other.requestRedirects= this.requestRedirects; other.fetchListRequestRedirects= this.fetchListRequestRedirects; other.robotsRequestRedirects= this.robotsRequestRedirects; other.requestsSucceeded= this.requestsSucceeded; other.fetchListRequestsSucceeded= this.fetchListRequestsSucceeded; other.robotsRequestsSucceeded= this.robotsRequestsSucceeded; for (int i= 0; i < NUM_FAIL_REASONS; i++) { other.requestsFailedByReason[i]= this.requestsFailedByReason[i]; other.fetchListRequestsFailedByReason[i]= this.fetchListRequestsFailedByReason[i]; other.robotsRequestsFailedByReason[i]= this.robotsRequestsFailedByReason[i]; } for (int i= 0; i < NUM_ERR_REASONS; i++) { other.requestErrorsByReason[i]= this.requestErrorsByReason[i]; other.fetchListRequestErrorsByReason[i]= this.fetchListRequestErrorsByReason[i]; other.robotsRequestErrorsByReason[i]= this.robotsRequestErrorsByReason[i]; } for (int i= 0; i < NUM_OUT_STATUS; i++) { other.outputStatusCounts[i]= this.outputStatusCounts[i]; } other.getRequestAttempts= this.getRequestAttempts; other.getRequestAllBusy= this.getRequestAllBusy; other.getRequestThrottled= this.getRequestThrottled; other.getRequestFoundExcluded= this.getRequestFoundExcluded; other.getRequestFoundNotReady= this.getRequestFoundNotReady; other.getRequestSuccesses= this.getRequestSuccesses; other.outputQueueAdds= this.outputQueueAdds; other.outputQueueAdded= this.outputQueueAdded; other.outputQueueAddDelays= this.outputQueueAddDelays; other.outputQueuePopAttempts= this.outputQueuePopAttempts; other.outputQueuePopped= this.outputQueuePopped; other.outputQueuePopNoDelay= this.outputQueuePopNoDelay; other.outputQueueEmpty= this.outputQueueEmpty; other.bytesFetched= this.bytesFetched; other.bytesTransferred= this.bytesTransferred; other.fetchListBytesFetched= this.fetchListBytesFetched; other.robotsBytesFetched= this.robotsBytesFetched; other.requestsReadFromFetchList= this.requestsReadFromFetchList; other.droppedOnFloor= this.droppedOnFloor; other.rawBytesSent= this.rawBytesSent; other.rawBytesRecieved= this.rawBytesRecieved; other.numCompressedTransfers= this.numCompressedTransfers; other.numContinues= this.numContinues; other.startTime= this.startTime; if (endTime == -1) other.endTime= new Date().getTime(); else other.endTime= endTime; return other; } /** * Returns a FetcherStatus object that reflects a "delta" * between this status, and an <code>earlierStatus</code>. * * <p> * * Almost all fields are simply set to <code>(this.field - * earlierStatus.field)</code>. The exception being the time * boundaries- the start time value is taken from * <code>earlierStatus</code>'s end time and the end time is taken * from <code>this</code>'s end time. * * <p> * * For the resulting FetcherStatus to be meaningful, the start times * of <code>this</code> and <code>earlierStatus</code> should be the * same, but this is not enforced. */ public FetcherStatus getDelta(FetcherStatus earlierStatus) { FetcherStatus delta= new FetcherStatus(); delta.requestsIssued= this.requestsIssued - earlierStatus.requestsIssued; delta.fetchListRequestsIssued= this.fetchListRequestsIssued - earlierStatus.fetchListRequestsIssued; delta.robotsRequestsIssued= this.robotsRequestsIssued - earlierStatus.robotsRequestsIssued; delta.http11RequestsIssued= this.http11RequestsIssued - earlierStatus.http11RequestsIssued; delta.requestRetries= this.requestRetries - earlierStatus.requestRetries; delta.fetchListRequestRetries= this.fetchListRequestRetries - earlierStatus.fetchListRequestRetries; delta.robotsRequestRetries= this.robotsRequestRetries - earlierStatus.robotsRequestRetries; delta.requestRedirects= this.requestRedirects - earlierStatus.requestRedirects; delta.fetchListRequestRedirects= this.fetchListRequestRedirects - earlierStatus.fetchListRequestRedirects; delta.robotsRequestRedirects= this.robotsRequestRedirects - earlierStatus.robotsRequestRedirects; delta.requestsSucceeded= this.requestsSucceeded - earlierStatus.requestsSucceeded; delta.fetchListRequestsSucceeded= this.fetchListRequestsSucceeded - earlierStatus.fetchListRequestsSucceeded; delta.robotsRequestsSucceeded= this.robotsRequestsSucceeded - earlierStatus.robotsRequestsSucceeded; for (int i= 0; i < NUM_FAIL_REASONS; i++) { delta.requestsFailedByReason[i]= this.requestsFailedByReason[i] - earlierStatus.requestsFailedByReason[i]; delta.fetchListRequestsFailedByReason[i]= this.fetchListRequestsFailedByReason[i] - earlierStatus.fetchListRequestsFailedByReason[i]; delta.robotsRequestsFailedByReason[i]= this.robotsRequestsFailedByReason[i] - earlierStatus.robotsRequestsFailedByReason[i]; } for (int i= 0; i < NUM_ERR_REASONS; i++) { delta.requestErrorsByReason[i]= this.requestErrorsByReason[i] - earlierStatus.requestErrorsByReason[i]; delta.fetchListRequestErrorsByReason[i]= this.fetchListRequestErrorsByReason[i] - earlierStatus.fetchListRequestErrorsByReason[i]; delta.robotsRequestErrorsByReason[i]= this.robotsRequestErrorsByReason[i] - earlierStatus.robotsRequestErrorsByReason[i]; } for (int i= 0; i < NUM_OUT_STATUS; i++) { delta.outputStatusCounts[i]= this.outputStatusCounts[i] - earlierStatus.outputStatusCounts[i]; } delta.getRequestAttempts= this.getRequestAttempts - earlierStatus.getRequestAttempts; delta.getRequestAllBusy= this.getRequestAllBusy - earlierStatus.getRequestAllBusy; delta.getRequestThrottled= this.getRequestThrottled - earlierStatus.getRequestThrottled; delta.getRequestFoundExcluded= this.getRequestFoundExcluded - earlierStatus.getRequestFoundExcluded; delta.getRequestFoundNotReady= this.getRequestFoundNotReady - earlierStatus.getRequestFoundNotReady; delta.getRequestSuccesses= this.getRequestSuccesses - earlierStatus.getRequestSuccesses; delta.outputQueueAdds= this.outputQueueAdds - earlierStatus.outputQueueAdds; delta.outputQueueAdded= this.outputQueueAdded - earlierStatus.outputQueueAdded; delta.outputQueueAddDelays= this.outputQueueAddDelays - earlierStatus.outputQueueAddDelays; delta.outputQueuePopAttempts= this.outputQueuePopAttempts - earlierStatus.outputQueuePopAttempts; delta.outputQueuePopped= this.outputQueuePopped - earlierStatus.outputQueuePopped; delta.outputQueuePopNoDelay= this.outputQueuePopNoDelay - earlierStatus.outputQueuePopNoDelay; delta.outputQueueEmpty= this.outputQueueEmpty - earlierStatus.outputQueueEmpty; delta.bytesFetched= this.bytesFetched - earlierStatus.bytesFetched; delta.bytesTransferred= this.bytesTransferred - earlierStatus.bytesTransferred; delta.fetchListBytesFetched= this.fetchListBytesFetched - earlierStatus.fetchListBytesFetched; delta.robotsBytesFetched= this.robotsBytesFetched - earlierStatus.robotsBytesFetched; delta.requestsReadFromFetchList= this.requestsReadFromFetchList - earlierStatus.requestsReadFromFetchList; delta.droppedOnFloor= this.droppedOnFloor - earlierStatus.droppedOnFloor; delta.rawBytesSent= this.rawBytesSent - earlierStatus.rawBytesSent; delta.rawBytesRecieved= this.rawBytesRecieved - earlierStatus.rawBytesRecieved; delta.numCompressedTransfers= this.numCompressedTransfers - earlierStatus.numCompressedTransfers; delta.numContinues= this.numContinues - earlierStatus.numContinues; delta.startTime= earlierStatus.endTime; if (endTime == -1) delta.endTime= new Date().getTime(); else delta.endTime= endTime; return delta; } /** * Logs a mapping of "terse names" to "pretty names" for all * error/failure/status messages. */ public static void logKeys() { HashSet dedup= new HashSet(); logTraceMisc(MISC_KEY, "Fetch Error Codes:"); for (int i= 0; i < NUM_ERR_REASONS; i++) { if (dedup.contains(getErrorTerseName(i))) LOG.severe("duplicate terse name: " + getErrorTerseName(i)); dedup.add(getErrorTerseName(i)); logTraceMisc(MISC_KEY, getErrorTerseName(i) + "\t" + getErrorPrettyName(i)); } logTraceMisc(MISC_KEY, "Fetch Failure Codes:"); for (int i= 0; i < NUM_FAIL_REASONS; i++) { if (dedup.contains(getFailureTerseName(i))) LOG.severe("duplicate terse name: " + getErrorTerseName(i)); dedup.add(getFailureTerseName(i)); logTraceMisc(MISC_KEY, getFailureTerseName(i) + "\t" + getFailurePrettyName(i)); } logTraceMisc(MISC_KEY, "Fetch Output Error Codes:"); for (int i= 0; i < NUM_OUT_STATUS; i++) { if (dedup.contains(getOutputStatusTerseName(i))) LOG.severe("duplicate terse name: " + getErrorTerseName(i)); dedup.add(getOutputStatusTerseName(i)); logTraceMisc(MISC_KEY, getOutputStatusTerseName(i) + "\t" + getOutputStatusPrettyName(i)); } logTraceMisc(MISC_KEY, "Fetch Misc Event Codes:"); for (int i= 0; i < NUM_MISC_CODES; i++) { if (dedup.contains(getMiscInfoTerseName(i))) LOG.severe("duplicate terse name: " + getErrorTerseName(i)); dedup.add(getMiscInfoTerseName(i)); logTraceMisc(MISC_KEY, getMiscInfoTerseName(i) + "\t" + getMiscInfoPrettyName(i)); } } /** * Prints a "trace" line to the log at <code>Level.INFO</code>. * Only one of <code>longMsg</code> or <code>shortMsg</code> will * be printed, followed by the <code>urlString</code> and the * additional messages, if any. */ public static void logTrace(String longMsg, String shortMsg, String urlString, String[] msgs) { String mainMsg= USE_LONG_TRACE_MSGS ? longMsg : shortMsg; if (urlString.startsWith("http://")) urlString= urlString.substring(7); StringBuffer logMsg= new StringBuffer(mainMsg); logMsg.append(" ").append(urlString); if (msgs != null) for (int i= 0; i < msgs.length; i++) logMsg.append(" ").append(msgs[i]); LOG.info(logMsg.toString()); } /** * Prints a "trace" line to the log at <code>Level.INFO</code>. * Only one of <code>longMsg</code> or <code>shortMsg</code> will * be printed, followed by the <code>url</code> and the * additional messages, if any. */ public static void logTrace(String longMsg, String shortMsg, URL url, String[] msgs) { if (url != null) logTrace(longMsg, shortMsg, url.toString(), msgs); else logTrace(longMsg, shortMsg, URL_UNKNOWN, msgs); } /** * Prints a "trace" line to the log at <code>Level.INFO</code>. * Only one of <code>longMsg</code> or <code>shortMsg</code> will * be printed, followed by the <code>urlString</code>. */ private static void logTrace(String longMsg, String shortMsg, String urlString) { logTrace(longMsg, shortMsg, urlString, null); } /** * Prints a "trace" line to the log at <code>Level.INFO</code>. * Only one of <code>longMsg</code> or <code>shortMsg</code> will * be printed, followed by the <code>url</code>. */ private static void logTrace(String longMsg, String shortMsg, URL url) { if (url != null) logTrace(longMsg, shortMsg, url.toString(), null); else logTrace(longMsg, shortMsg, URL_UNKNOWN, null); } /** * Prints an appropriate "trace" line to the log, reflecting the * failure code in the supplied <code>request</code>. */ public static void logTraceReqFailure(RequestRecord request) { int failCode= request.getFailureReason(); if ( (failCode == FAIL_NOT_FOUND) && (!LOG_NOT_FOUND) ) return; String longMsg= getFailurePrettyName(failCode); String shortMsg= getFailureTerseName(failCode); logTrace(longMsg, shortMsg, request.getOriginalRequest().getURLString(), request.getFailureMessages()); } /** * Prints an appropriate "trace" line to the log, reflecting the * error code in the supplied <code>request</code>. */ public static void logTraceReqError(RequestRecord request) { int errCode= request.getErrorReason(); String longMsg= getErrorPrettyName(errCode); String shortMsg= getErrorTerseName(errCode); logTrace(longMsg, shortMsg, request.getOriginalRequest().getURLString(), request.getErrorMessages()); } /** * Prints an appropriate "trace" line to the log, reflecting the * failure code in the supplied <code>request</code>. */ public static void logTraceOutputStatus(RequestRecord request, String urlString) { int statusCode= request.getOutputStatus(); if ( (statusCode != OUT_OK) || LOG_SUCCESS) { String longMsg= getOutputStatusPrettyName(statusCode); String shortMsg= getOutputStatusTerseName(statusCode); logTrace(longMsg, shortMsg, urlString, request.getOutputStatusMessages()); } } /** * Prints an appropriate "trace" line to the log, reflecting the * miscellaneous event code (see the <code>MISC_*<code> constants) * and the given URL. */ public static void logTraceMisc(int miscCode, URL url) { String longMsg= getMiscInfoPrettyName(miscCode); String shortMsg= getMiscInfoTerseName(miscCode); logTrace(longMsg, shortMsg, url.toString()); } /** * Prints an appropriate "trace" line to the log, reflecting the * miscellaneous event code (see the <code>MISC_*<code> constants) * and the given message String. */ public static void logTraceMisc(int miscCode, String msg) { String longMsg= getMiscInfoPrettyName(miscCode); String shortMsg= getMiscInfoTerseName(miscCode); logTrace(longMsg, shortMsg, msg); } private static String getMiscInfoPrettyName(int miscCode) { switch (miscCode) { case MISC_STATS: return "Stats"; case MISC_KEY: return "Logging code key"; case MISC_ROBOTS_FORBIDDEN: return "Robots Req Forbidden"; case MISC_META_NOINDEX: return "META NOINDEX"; case MISC_META_NOFOLLOW: return "META NOFOLLOW"; case MISC_META_NOCACHE: return "META NOCACHE"; case MISC_FETCH_SUCCESS: return "Successful Fetch"; case MISC_INFORMATIONAL: return "Informational"; default: return "*MISC: No Displayable Form* (" + miscCode + ")"; } } private static String getMiscInfoTerseName(int miscCode) { switch (miscCode) { case MISC_STATS: return "STS"; case MISC_KEY: return "KEY"; case MISC_ROBOTS_FORBIDDEN: return "RFB"; case MISC_META_NOINDEX: return "NIN"; case MISC_META_NOFOLLOW: return "NFO"; case MISC_META_NOCACHE: return "NCA"; case MISC_FETCH_SUCCESS: return "SUC"; case MISC_INFORMATIONAL: return "INF"; default: return "MISC" + miscCode; } } /** * Returns a displayable form for the <code>failureReason</code>. * The result is meant for human consumption. */ public static String getFailurePrettyName(int failureReason) { switch (failureReason) { case FAIL_UNKNOWN: return "Unknown Failure"; case FAIL_BAD_URL: return "Bad URL"; case FAIL_ROBOTS_EXCLUDED: return "Robots Excluded"; case FAIL_TOO_MANY_ERRORS: return "Max Errors"; case FAIL_TOO_MANY_REDIRECTS: return "Max Redirects"; case FAIL_REDIRECT_MISSING_TARGET: return "Redirect Missing Target"; case FAIL_NOT_FOUND: return "Not Found"; case FAIL_FORBIDDEN: return "Forbidden"; case FAIL_REDIRECT_LOOP_DETECTED: return "Redirect Loop"; case FAIL_HOSTNAME_BANNED: return "Hostname Banned"; case FAIL_DEAD_HOST: return "Dead Host"; case FAIL_UNKNOWN_RESP_CODE: return "Unknown Response Code"; case FAIL_UNKNOWN_HOST: return "Unknown Host"; case FAIL_CONNECTION_REFUSED: return "Connection Refused"; default: return "*FAIL: No Displayable Form* (" + failureReason + ")"; } } /** * Returns a terse displayable form for the * <code>failureReason</code>. The result contains no whitespace * and is suitable for consumption by log analysers. */ public static String getFailureTerseName(int failureReason) { switch (failureReason) { case FAIL_UNKNOWN: return "UNF"; case FAIL_BAD_URL: return "URL"; case FAIL_ROBOTS_EXCLUDED: return "XCL"; case FAIL_TOO_MANY_ERRORS: return "ERR"; case FAIL_TOO_MANY_REDIRECTS: return "RDR"; case FAIL_REDIRECT_MISSING_TARGET: return "TGT"; case FAIL_NOT_FOUND: return "FND"; case FAIL_FORBIDDEN: return "FBD"; case FAIL_REDIRECT_LOOP_DETECTED: return "LOP"; case FAIL_HOSTNAME_BANNED: return "BAN"; case FAIL_DEAD_HOST: return "DED"; case FAIL_UNKNOWN_RESP_CODE: return "COD"; case FAIL_UNKNOWN_HOST: return "DNS"; case FAIL_CONNECTION_REFUSED: return "REF"; default: return "FAIL"+failureReason; } } /** * Returns a displayable form for the <code>errorReason</code>. * The result is meant for human consumption. */ public static String getErrorPrettyName(int errorReason) { switch (errorReason) { case ERR_UNKNOWN: return "Unknown Error"; case ERR_CONNECTION_TIMED_OUT: return "Connection Timed Out"; case ERR_BAD_HEADER_LINE: return "Bad Header Line"; case ERR_RESET_BY_PEER: return "Reset By Peer"; case ERR_BAD_STATUS_LINE: return "Bad Status Line"; case ERR_EOF_DURING_READ: return "EOF During Read"; case ERR_NO_ROUTE: return "No Route to Host"; case ERR_SOCKET_TIMEOUT: return "Socket Timeout"; case ERR_NETWORK_UNREACHABLE: return "Network Unreachable"; case ERR_BAD_CONTENT_LENGTH: return "Bad Content-Length"; case ERR_CHUNKLEN_PARSE: return "Bad Chunk Length"; case ERR_CHUNK_EOF: return "EOF in chunk"; case ERR_DECOMPRESS: return "Unzip Failed "; default: return "*ERR: No Displayable Form* (" + errorReason + ")"; } } /** * Returns a terse displayable form for the * <code>errorReason</code>. The result contains no whitespace * and is suitable for consumption by log analysers. */ public static String getErrorTerseName(int errorReason) { switch (errorReason) { case ERR_UNKNOWN: return "UNE"; case ERR_CONNECTION_TIMED_OUT: return "CTO"; case ERR_BAD_HEADER_LINE: return "HED"; case ERR_RESET_BY_PEER: return "RST"; case ERR_BAD_STATUS_LINE: return "STT"; case ERR_EOF_DURING_READ: return "EOF"; case ERR_NO_ROUTE: return "NRT"; case ERR_SOCKET_TIMEOUT: return "STO"; case ERR_NETWORK_UNREACHABLE: return "NUN"; case ERR_BAD_CONTENT_LENGTH: return "CLN"; case ERR_CHUNKLEN_PARSE: return "BCL"; case ERR_CHUNK_EOF: return "CEF"; case ERR_DECOMPRESS: return "ZIP "; default: return "ERR"+errorReason; } } /** * Returns a displayable form for the <code>errorReason</code>. * The result is meant for human consumption. */ public static String getOutputStatusPrettyName(int outputStatus) { switch (outputStatus) { case OUT_OK: return "Output OK"; case OUT_UNKNOWN: return "Unknown output error"; case OUT_DOM_ERROR: return "DOM parse error"; case OUT_DOM_EXCEPTION: return "DOM parser failed"; case OUT_UNKNOWN_CONTENT: return "Unknown Content-Type"; case OUT_ENCODING_ERR: return "Character Encoding Error"; default: return "*OUT: No Displayable Form* (" + outputStatus + ")"; } } /** * Returns a terse displayable form for the * <code>outputStatus</code>. The result contains no whitespace and * is suitable for consumption by log analysers. */ public static String getOutputStatusTerseName(int outputStatus) { switch (outputStatus) { case OUT_OK: return "OOK"; case OUT_UNKNOWN: return "UNO"; case OUT_DOM_ERROR: return "DME"; case OUT_DOM_EXCEPTION: return "DMF"; case OUT_UNKNOWN_CONTENT: return "UCT"; case OUT_ENCODING_ERR: return "CEE"; default: return "OUT"+outputStatus; } } }